Round 1: Top-level cell type annotation
- nTconv : CD3+, CD4+, CD8-, CD25-/CD127+, CD45RA+/CD45RO-
- mTconv : CD3+, CD4+, CD8-, CD25-/CD127+, CD45RA-/CD45RO+
- nTreg : CD3+, CD4+, CD8-, CD25+/CD127-, CD45RA+/CD45RO-
- mTreg : CD3+, CD4+, CD8-, CD25+/CD127-, CD45RA-/CD45RO+
mkdir -p result/step2/bbknn/
[ -f result/step2/matrix.batch.gz ] || \
gzip -cd result/step1/matrix_final.cols.gz | \
awk -F'_' '{ print $2 }' | \
gzip -c > result/step2/matrix.batch.gz
[ -f result/step2/bbknn/total.mtx.gz ] || \
mmutil_bbknn \
--mtx result/step1/matrix_final.mtx.gz \
--col result/step1/matrix_final.cols.gz \
--batch result/step2/matrix.batch.gz \
--knn 100 --rank 100 --log_scale \
--out result/step2/bbknn/total \
2> result/step2/bbknn/total.log
mkdir -p result/step2/assignment/
[ -f result/step2/assignment/round1.annot.gz ] || \
mmutil_annotate_col \
--svd_u result/step2/bbknn/total.svd_U.gz \
--svd_d result/step2/bbknn/total.svd_D.gz \
--svd_v result/step2/bbknn/total.factors.gz \
--row result/step1/matrix_final.rows.gz \
--col result/step2/bbknn/total.cols.gz \
--ann marker/surface_round1_positive.txt \
--anti marker/surface_round1_negative.txt \
--em_iter 1000 --kappa_max 100 --em_tol 1e-8 \
--out result/step2/assignment/round1 \
2> result/step2/assignment/round1.log
if ! [ -f result/step2/protein.mtx.gz ] ; then
gzip -cd result/step1/matrix_final.rows.gz | \
awk '/anti_/ || /CCR/ || /CXCR/' | \
gzip > result/step2/protein.select.gz
mmutil_select_row \
result/step1/matrix_final.mtx.gz \
result/step1/matrix_final.rows.gz \
result/step1/matrix_final.cols.gz \
result/step2/protein.select.gz \
result/step2/protein
fi
prot.raw.mtx <- Matrix::readMM("result/step2/protein.mtx.gz") %>% as.matrix()
rownames(prot.raw.mtx) <- .fread("result/step2/protein.rows.gz") %>% unlist
colnames(prot.raw.mtx) <- .fread("result/step2/protein.cols.gz") %>% unlist
features <- .fread("result/step1/matrix_final.rows.gz") %>% unlist
.idx <- which(str_starts(features, "anti_"))
.rows <- features[.idx]
U <- .fread("result/step2/bbknn/total.svd_U.gz") %>% as.matrix
U <- U[.idx, , drop = FALSE]
D <- .fread("result/step2/bbknn/total.svd_D.gz") %>% as.matrix
V <- .fread("result/step2/bbknn/total.factors.gz") %>% as.matrix
prot.mtx <- sweep(U, 2, D, `*`) %*% t(V)
rownames(prot.mtx) <- .rows
colnames(prot.mtx) <- .fread("result/step2/bbknn/total.cols.gz") %>% unlist
.col <- c("tag", "celltype", "prob", "ln.prob")
annot.dt <- .fread("result/step2/assignment/round1.annot.gz", col.names=.col)
annot.dt[, c("barcode","batch") := tstrsplit(tag, split="_")]
.ct <- c("mTreg","nTreg","mTconv","nTconv")
plt <- plt.scatter.ct.2(.ct, annot.dt, prot.raw.mtx)
print(plt)

.file <- fig.dir %&% "/Fig_round1_cdmarker_raw.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)
PDF
.ct <- c("mTreg","nTreg","mTconv","nTconv")
plt <- plt.scatter.ct.2(.ct, annot.dt, pmax(exp(prot.mtx) - 1, 0))
print(plt)

.file <- fig.dir %&% "/Fig_round1_cdmarker_bbknn.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)
PDF
UMAP to confirm cell type assignment results
svd.file <- "result/step2/bbknn/total.factors.gz"
out.file <- "result/step2/umap/bbknn_total.gz"
.umap.dt <- read.umap(svd.file, out.file, npc=10)
.col.file <- "result/step1/matrix_final.cols.gz"
plt <- plot.three.umap(.umap.dt, annot.dt, .col.file)
print(plt)

.file <- fig.dir %&% "/Fig_annotation_umap_bbknn.pdf"
.gg.save(filename = .file, plot = plt, width=3.5, height=8)
PDF
Q/C by marker genes
mkdir -p result/step2/marker/
echo "FOXP3 ID3 BACH2 CXCR3 PRDM1 SGK1 TCF7 LEF1 SELL IL2RA IL7R IKZF2 CCR6 CCR4 CCR7 CTLA4 HLA-DRA anti_CD25 anti_CD127 anti_CD183 anti_CD196 anti_CD197 anti_CD194 anti_CD45RA anti_CD45RO anti_HLA" > result/step2/marker/marker.select
if ! [ -f result/step2/marker/matrix.mtx.gz ] ; then
mmutil_select_row \
result/step1/matrix_final.mtx.gz \
result/step1/matrix_final.rows.gz \
result/step1/matrix_final.cols.gz \
result/step2/marker/marker.select \
result/step2/marker/matrix
fi
.markers <- sort(as.character(unique(marker.dt$marker)))
for(g in .markers){
plt <- plot.marker.umap(g)
print(plt)
.file <- fig.dir %&% "/Fig_marker_" %&% g %&% "_umap.pdf"
.gg.save(filename = .file, plot = plt, width=3.2, height=3)
}

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF
Basic statistics for the first round annotation (29,179 cells)
.hash.info <-
readxl::read_xlsx("data/Hashing list MS Treg project.xlsx", 1) %>%
mutate(hash = str_remove(hash,"#")) %>%
mutate(hash = as.integer(hash)) %>%
rename(Sample = `Cell type`)
.dt <-
annot.dt %>%
mutate(batch=as.integer(batch)) %>%
left_join(fread("data/hashtag.tsv.gz",header=T)) %>%
left_join(.hash.info) %>%
mutate(ct=factor(celltype, c("nTconv","mTconv","nTreg","mTreg"))) %>%
filter(batch != 2, !is.na(ct), !is.na(Sample))
Combining all the experiments

PDF

PDF